### This R script is to plot repayment rates, i.e. Figure 4 Panels (a) and (b)

##########################################################################################
## Load packages (note: install first if not yet loaded)
##########################################################################################

#install.packages("tidyverse")
#install.packages("ggplot2") 
#install.packages("foreign")
#install.packages("doBy")
#install.packages("data.table")

library(tidyverse)
library(ggplot2)
library(foreign)
library(doBy)
library(data.table)

# Unused but keep in case
#install.packages("gridExtra")
#install.packages("ggpubr")
#library(gridExtra)
#library(ggpubr)

##########################################################################################
## Run data work
##########################################################################################

# Load dataset
repay_main_extend <- read.dta("/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/data/repayment/fenix_repay_extend_07172020_strict_rep.dta")

# Drop loandayselapsed more than 200 days
repay_main_extend <- subset(repay_main_extend, loandayselapsed<=200)

# Make treatment assignment labels simpler
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-L"] <- "Secured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-U"] <- "Surprise Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T2-U"] <- "Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T3"] <- "Choice"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R C"] <- "Control"

# Duplicate treatmenttype and then factorize the treatmenttype (important for graphing)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_sh)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_fac, levels = c("Secured", "Surprise Unsecured", "Unsecured", "Choice", "Control"))

# Make versions that are without control and choice
repay_main_extend_noctrl <- repay_main_extend[which(repay_main_extend$treatmenttype_sh !="Control"),]
repay_main_extend_noctrlnochoice <- repay_main_extend_noctrl[which(repay_main_extend_noctrl$treatmenttype_sh !="Choice"),]

# Create a summary dataset that collects together, at each value of loandayselapsed, the mean and variance...
rr_data <- summaryBy(frac_lpp_maxip ~ loandayselapsed + treatmenttype_fac, FUN=c(mean, sd), data=repay_main_extend_noctrlnochoice)

# I also want differences between repayment rates
# Clone rr_data
piece1 <- subset(rr_data, treatmenttype_fac=="Secured")
piece2 <- subset(rr_data, treatmenttype_fac=="Surprise Unsecured")
piece3 <- subset(rr_data, treatmenttype_fac=="Unsecured")
# Create a dataset that loandayselapsed, and the various means
rr_data2 <- as.data.table(piece1[,1])
colnames(rr_data2)[which(names(rr_data2) == "V1")] <- "loandayselapsed"
rr_data2$mean.locked <- as.data.table(piece1[,3])
rr_data2$mean.surpriseunlocked <- as.data.table(piece2[,3])
rr_data2$mean.unlocked <- as.data.table(piece3[,3])
# Take differences
rr_data2$redgreen <- rr_data2$mean.locked - rr_data2$mean.unlocked
rr_data2$redorange <- rr_data2$mean.locked - rr_data2$mean.surpriseunlocked
rr_data2$orangegreen <- rr_data2$mean.surpriseunlocked - rr_data2$mean.unlocked
# Make pieces again, and make a variable that labels the group, and add that group label
redgreenpiece <- rr_data2 %>% select("loandayselapsed", "redgreen")
colnames(redgreenpiece)[which(names(redgreenpiece) == "redgreen")] <- "diff"
redgreenpiece$type <- "Total Effect"
redorangepiece <- rr_data2 %>% select("loandayselapsed", "redorange")
colnames(redorangepiece)[which(names(redorangepiece) == "redorange")] <- "diff"
redorangepiece$type <- "Moral Hazard"
orangegreenpiece <- rr_data2 %>% select("loandayselapsed", "orangegreen")
colnames(orangegreenpiece)[which(names(orangegreenpiece) == "orangegreen")] <- "diff"
orangegreenpiece$type <- "Selection"
# Stack pieces on top of each other
rr_data2 <- as.data.table(rbind(redgreenpiece, redorangepiece, orangegreenpiece, fill=TRUE))
# Factorize the levels again, to order as Total Effect, Moral Hazard, Selection
rr_data2$type_fac <- ordered(rr_data2$type)
rr_data2$type_fac <- ordered(rr_data2$type_fac, levels = c("Total Effect", "Moral Hazard", "Selection"))



############################

# Graph 1: repayment rates
myplot <- ggplot() +
  geom_line(data=rr_data, aes(x=loandayselapsed, y=frac_lpp_maxip.mean, group=treatmenttype_fac, color=treatmenttype_fac), size=0.5) + 
  scale_color_manual(values=c('red', 'orange', 'green')) +
  xlab("Days elapsed since loan creation") +
  ylab("Fraction of principal repaid") + 
  scale_y_continuous(breaks=c(0.24, 0.36, 0.48, 0.60, 0.72), limits=c(0.24,.72)) +
  scale_x_continuous(breaks=c(50, 100, 150, 200), limits=c(50, 200)) +
  labs(color='Assigned treatment group') +
  theme(
    legend.justification = c("right", "bottom"),
    legend.position = c(1.00, .00),
    legend.background = element_rect(fill = NA),
    panel.background = element_rect(fill = NA),
    panel.border = element_rect(fill = NA, color = 'grey75'),
    axis.ticks = element_line(color = 'grey85'),
    axis.title.x = element_text(vjust=-1),
    axis.title.y = element_text(vjust=3),
    plot.margin = unit(c(1,1,1,1), 'cm'),
    legend.title=element_blank(),
    legend.text=element_text(size=6), 
    legend.key.size=unit(0.5,"line"),
    plot.title = element_text(size = 10),
    text = element_text(size=10)
    )
# Figure 4 Panel (a)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/repaymentratesgrey.pdf", width = 4, height = 3)

# Graph 2: difference in repayment rates
myplot2 <- ggplot() +
  geom_line(data=rr_data2, aes(x=loandayselapsed, y=diff, group=type_fac, color=type_fac), size=0.5) + 
  scale_color_manual(values=c('red', 'black', 'blue')) +
  xlab("Days elapsed since loan creation") +
  ylab("Difference in fraction of principal repaid") + 
  scale_y_continuous(breaks=c(0.01, 0.03, 0.05, 0.07, 0.09, 0.11, 0.13), limits=c(0.01,0.13)) +
  scale_x_continuous(breaks=c(50, 100, 150, 200), limits=c(50, 200)) +
  labs(color='Type of effect') +
  theme(
    legend.justification = c("right", "bottom"),
    legend.position = c(1.00, .00),
    legend.background = element_rect(fill = NA),
    panel.background = element_rect(fill = NA),
    panel.border = element_rect(fill = NA, color = 'grey75'),
    axis.ticks = element_line(color = 'grey85'),
    axis.title.x = element_text(vjust=-1),
    axis.title.y = element_text(vjust=3),
    plot.margin = unit(c(1,1,1,1), 'cm'),
    legend.title=element_blank(),
    legend.text=element_text(size=6),
    legend.key.size=unit(0.5,"line"),
    plot.title = element_text(size = 10),
    text = element_text(size=10)
  )
# Figure 4 Panel (b)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/diff_repayments.pdf", width = 4, height = 3)
